" Importing Data In R "

" In .csv format "

# Syntax:
# <variable> = read.csv("Path",header = )


" Using Path "
Mtcars = read.csv("C:\\Users\\tanis\\Desktop\\Test Work.csv",
                  header = T)

" Using file.choose() "
Mtcars = read.csv(file.choose())


" In .txt format "

# Syntax:
# <variable> = read.table("Path",header = ,sep = )

" Using Path "
Mtcars = read.table()

" Using file.choose() "
Multi = read.table(file.choose(),
                   sep = ",",
                   header = T)
 
  

" In .xlsx format "

# Syntax:
# read_xlsx("Path",sheet = )


install.packages("readxl")

library("readxl")

Data = read_xlsx(file.choose(),
                 sheet = "Product",
                 col_names = T)



" Exporting Data From R "

" 3 components required for exporting a data
  from R.
  1.) The Data we want to export
  2.) The location / path at which we want to 
      paste the data
  3.) The name with which the file should be 
      stored along with appropriate extension "


" In .csv format "

# Syntax:
# write.csv(<Data Name>,"Path")

write.csv(iris,"C:/Users/tanis/Desktop/Sample Export.csv")


" In .xlsx format "

# Syntax:
# write_xlsx(<Data Name>,"Path")


library(writexl)

write.csv(data.frame(x = seq(1:1*10^7)))


" In .txt format "

# Syntax:
# write.table(<Data Name>,"Path",sep = )

write.table(data,"Path",sep = ",")


" Creating a New column based on 
  Condtional Statements using ifelse() "

# Syntax:
# ifelse(condition, if-true, if-false)

" Eg.
  Load the inbuilt data 'mtcars'
  and classify each obs on the below
  criteria:

  If 'mpg' <= 20 : SUV
     'mpg' (20,25] : Sedan
     'mpg' (25,30] : Mid SUV
     'mpg' > 30: Hatchback "

data = mtcars

data$type_4 = 
  ifelse(data$mpg <= 20,"SUV",
         ifelse(data$mpg <= 25,
                "Sedan",
                ifelse(data$mpg <= 30,
                       "Mid SUV","Hatchback")))

sum(data$type_3 != data$type_4)
data$check = "OK"

sum(data$type_3 != data$check)


data$Class = ifelse(data$mpg <= 10,"SUV",
                    ifelse(data$mpg <= 15,"Sedan",
                           ifelse(data$mpg <= 20,
                                  "Mid SUV",
                                  "Hatchback")))


" Q. How is this different than
     if - else if.

     Also, solve the above problem using
     if - else if "

data$Class = ""

for(i in 1:nrow(data)){
  if(data$mpg[i] <= 15){
    data$Class[i] = "SUV"
  }
  else{
    data$Class[i] = "Others"
  }
}



" Summarizing Data Using DPLYR "

" At times, we may want to find
  information at an overall level
  by creating Summaries.

  Similar to Pivot tables in Excel 

 For. example:
      Total Business written by a General Ins
      Co. across all states in India

      Total Profit generated by each customer
      on Amazon "



' Pipe Operator (%>%) '
# Syntax:
# Data %>% group_by() %>% summarize()

" Understanding:
  group_by() takes variables against which
  the grouping of data is to be done 
  (Specifying rows in Excel)

  summarise() take variables and
  function over which aggregation 
  is to be made "

" Eg.
  Import the dataset 'Sales' and understand
  the Data.

  1.)
  Find out the Total Profit generated by
  each unique Customer. Also, find out the
  bad customers.

  2.) 
  Can we say that the average order value
  between the different Ship Modes is
  different?

"

Sales = read.csv(file.choose())

P = Sales %>% group_by(Customer.ID) %>% 
  summarise(Total = sum(Profit))

P = arrange(P,Total)

P$Tag = ifelse(P$Total < 0,
               "Bad",
               "Good")



Q = Sales %>% group_by(Ship.Mode) %>% 
  summarise(Avg = round(mean(Sales)))

Q$Avg = round(Q$Avg,2)



library(dplyr)

Sales = read.csv("C:/Users/tanis/Desktop/Data Files/Sales.csv")

Profit_Agg = Sales %>% group_by(Customer.ID) %>% 
                        summarise(Total_Profit = sum(Profit))
  

library(dplyr)

mtcars %>% group_by(cyl,am) %>% 
  summarise(Avg_Mileage = mean(mpg),
            Sd_Mileage = sd(mpg))






" H: 30.06
  Sedan: 19.22
  SUV : 13.01 "

x = mtcars %>% group_by() %>% 
       summarise(Avg.MPG = round(mean(mpg),2),
                 SD = round(sd(mpg),2))



" Using apply() "

" Understanding:
  The Apply function is used to
  implement a function on a data
  either row wise / column wise "

# Syntax:
# apply(<data>, <row/column indicator>,
#                           <function to apply>)

data = iris[,-c(5)]
data$ROW.SD = apply(data,
                    MARGIN = 1,FUN = sd)

apply(data,
      MARGIN = 2,FUN = sum)





data = iris[ , 1:4]


x = apply(data,MARGIN = 1,sd)

apply(mtcars[,c("mpg","qsec")],
      MARGIN = 2,
      mean)


as.numeric(c('Normal',1,"223",48))


data = iris[,1:4]

for(i in 1:nrow(data)){
  
  data$Mean[i] = mean(as.numeric(data[i,]))
  
}



mean(as.numeric(data[1,]))


" Quick Tips "

" nrow() & ncol() & dim() "
" nrow() provides the no. of rows in a DF
  ncol() provides the no. of cols in a DF 
  dim() provides the no. of rows and cols in a DF"

" Eg. "

ncol(iris)
nrow(iris)

dim(iris)
" Note: 1st element: Rows
        2nd element: Columns "

